#Area Crosswalks
area_codes <- readRDS(here("State Data/area_crosswalk.RDS")) %>%
mutate(area_title = str_remove(area_title, " -- Statewide")) %>%
mutate(area_fips = case_when(
nchar(area_fips) == 4 ~ paste("0", area_fips, sep = ""),
TRUE ~ area_fips
))
states <- data.frame(state_abbr = state.abb, area_title = state.name)
area_codes <- left_join(area_codes, states) %>%
mutate(state_abbr = case_when(
area_title == "District of Columbia" ~ "DC",
TRUE ~ state_abbr))
## Joining with `by = join_by(area_title)`
#Map Data
county_sf <- counties(cb = TRUE) %>%
shift_geometry(position = "outside")
## Retrieving data for the year 2021
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
states_sf <- states(cb = TRUE, resolution = "20m") %>%
shift_geometry(position = "outside")
## Retrieving data for the year 2021
##
|
| | 0%
|
|============= | 19%
|
|======================== | 34%
|
|============================================= | 64%
|
|================================================ | 69%
|
|====================================================== | 77%
|
|============================================================ | 86%
|
|======================================================================| 100%
#Convert county data to a table
county_data <- counties_leaflet %>%
as_tibble() %>%
select(STATEFP, COUNTYFP) %>%
mutate(area_fips = paste(STATEFP, COUNTYFP, sep = ""),
st = as.numeric(STATEFP))
#Get center of each county
county_centers <- counties_leaflet %>%
filter(str_detect(NAME, "Mariana", negate = TRUE)) %>%
st_centroid() %>%
sf::st_transform('+proj=longlat +datum=WGS84')
## Warning: st_centroid assumes attributes are constant over geometries
#Load Existing Data
qcew_3digits <- readRDS(here("State Data/qcew_3digit.RDS")) %>%
mutate(industry_desc = substring(industry_title, 11))
#Simplify to get industry codes
ind_3digit <- qcew_3digits %>%
select(naics_3digit = ind_code, naics_3digit_label = industry_desc) %>%
distinct() %>%
arrange(naics_3digit_label)
#Define colors
emp_ind_vector <- c(brewer.pal(9, "Greys")[4], "#cf4633", "#BEAED4", "#FDC086", "#FDBF6F", "#386CB0", "#FB8072", brewer.pal(9, "Greys")[3], brewer.pal(9, "Greys")[8], "#80B1D3", "#F0027F", "#4DAF4A", "#F1E2CC", "#6A3D9A", "#E78AC3", "#CBD5E8", "#666666", brewer.pal(9, "Greys")[5], brewer.pal(9, "Greys")[6], "#A65628", brewer.pal(9, "Greys")[7])
This document cleans and summarizes input-output use and supply tables.
#read data
cbp_2019 <- read.csv(here("County Data/5_digit_naics_2019.csv")) %>%
mutate(area_fips = str_sub(GEO_ID, -5)) %>%
left_join(county_data) %>%
left_join(area_codes %>% select(st, state_abbr)) %>%
mutate(emp_num = as.numeric(EMP))
## Joining with `by = join_by(area_fips)`
## Joining with `by = join_by(st)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `emp_num = as.numeric(EMP)`.
## Caused by warning:
## ! NAs introduced by coercion
use_table_raw <- read.csv(here("Input Output Data/USE_TABLE_2017/2017-Table 1.csv"))
We create a crosswalk between industry codes and their descriptions.
use_cols <- colnames(use_table_raw)
ind_codes <- use_table_raw %>%
head(1) %>%
pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "industry_code") %>%
select(industry_code, industry_desc)
The first step will be to convert this data into a long data format.
use_table_long <- use_table_raw[-1,] %>%
pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "value", values_drop_na = TRUE) %>%
mutate(value_clean = str_remove_all(value, "[[:punct:]]"),
value_num = as.numeric(value_clean)) %>%
filter(!is.na(value_num)) %>%
rename(commodity_code = 1, commodity_desc = 2) %>%
left_join(ind_codes)
## Joining with `by = join_by(industry_desc)`
The resulting dataframe details how individuals industries use specific commodities. E.G the DEMAND for specific INPUTS from industires. We add a few additional descriptive columns to be able to segment and sort our data. Values in this data are then the total amount in millions of dollars of specific inputs used by specific industries.
use_table_clean <- use_table_long %>%
mutate(com_code_3 = as.numeric(str_sub(commodity_code, 1, 3)),
naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>%
left_join(ind_3digit)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `com_code_3 = as.numeric(str_sub(commodity_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Joining with `by = join_by(naics_3digit)`
We note that some of the commodity codes refer to aggregate measures. The same is true for some of the industry codes.
agg_coms <- use_table_clean %>% select(commodity_code, commodity_desc, com_code_3) %>% distinct() %>% filter(is.na(com_code_3))
agg_coms <- agg_coms[-c(1:2),]
agg_coms %>% select(1:2)
## # A tibble: 20 × 2
## commodity_code commodity_desc
## <chr> <chr>
## 1 S00500 Federal general government (defense)
## 2 S00600 Federal general government (nondefense)
## 3 S00102 Other federal government enterprises
## 4 GSLGE State and local government (educational services)
## 5 GSLGH State and local government (hospitals and health services)
## 6 GSLGO State and local government (other services)
## 7 S00203 Other state and local government enterprises
## 8 S00401 Scrap
## 9 S00402 Used and secondhand goods
## 10 S00300 Noncomparable imports
## 11 S00900 Rest of the world adjustment
## 12 T005 Total intermediate inputs
## 13 V00100 Compensation of employees
## 14 T00OTOP Other taxes on production
## 15 V00300 Gross operating surplus
## 16 VABAS Value added (basic value)
## 17 T018 Total industry output (basic value)
## 18 T00TOP Plus: Taxes on products and imports
## 19 T00SUB Less: Subsidies
## 20 VAPRO Value added (producer value)
These codes are useful in informing aggregate measures, such as total use of inputs, total use of imports. In addition, the USE table contains valuable information about total value added by industry. Total commodity output and total industry output are both found in the USE table.
We see that the use table provides a wide variety of measures about both commodity useage, as well as industry output.
agg_ind <- use_table_clean %>% select(industry_code, industry_desc, naics_3digit) %>% distinct() %>% filter(is.na(naics_3digit))
agg_ind <- agg_ind[-c(10),]
agg_ind %>% select(1:2)
## # A tibble: 33 × 2
## industry_code industry_desc
## <chr> <chr>
## 1 S00600 Federal.general.government..nondefense.
## 2 T001 Total.Intermediate
## 3 F03000 Change.in.private.inventories
## 4 F04000 Exports.of.goods.and.services
## 5 T019 Total.use.of.products
## 6 GSLGE State.and.local.government..educational.services.
## 7 GSLGH State.and.local.government..hospitals.and.health.services.
## 8 GSLGO State.and.local.government..other.services.
## 9 F01000 Personal.consumption.expenditures
## 10 4B0000 All.other.retail
## # ℹ 23 more rows
Because of the way that these databases are constructred, the agg_ind dataframe above refers to commodity level summaries, while the agg_com dataframe above refers to industry level summaries. We spend some time at the moment to create some commodity and industry level benchmarks.
agg_coms %>%
filter(commodity_code %in% c("T005", "S00300", "VABAS", "T018", "VAPRO")) %>%
select(1:2)
## # A tibble: 5 × 2
## commodity_code commodity_desc
## <chr> <chr>
## 1 S00300 Noncomparable imports
## 2 T005 Total intermediate inputs
## 3 VABAS Value added (basic value)
## 4 T018 Total industry output (basic value)
## 5 VAPRO Value added (producer value)
We begin with creating benchmarks for value added, total output by industry, use of imports, and use of intermediary inputs.
ind_agg <- use_table_clean %>%
filter(commodity_code %in% c("T005", "S00300", "VABAS", "T018", "VAPRO")) %>%
select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>%
pivot_wider(id_cols = c(industry_code, industry_desc), names_from = commodity_code, values_from = value_num)
colnames(ind_agg) <- c("industry_code", "industry_desc", "imports", "intermediary_inputs", "value_added", "industry_output", "prod_value_added")
This dataframe contains information, aggregated at the INDUSTRY level, about the total of use of inputs by industries, AS WELL as the overall OUTPUT and VALUE ADDED of each industry.
We can immediately produce some summary statistics about overall industry activity here. Again, this work focuses specifically on the manufacturing sector.
manf_ind <- ind_agg %>%
mutate(naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>%
left_join(ind_3digit) %>%
filter(naics_3digit >= 300 & naics_3digit < 400) %>%
#FIX MISSING VALUES WITH 0
mutate(across(.cols = c(3:7), ~ case_when(
is.na(.) ~ 0,
TRUE ~ .)),
log_ind = log(industry_output),
log_value = log(value_added),
log_inputs = log(intermediary_inputs),
val_ratio = value_added/industry_output,
input_ratio = intermediary_inputs/industry_output,
log_imports = log(imports),
import_ratio = imports/intermediary_inputs)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `naics_3digit = as.numeric(str_sub(industry_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## Joining with `by = join_by(naics_3digit)`
ind_pal <- emp_ind_vector[-c(1, 17, 9)]
ind_output <- manf_ind %>%
filter(industry_output != 0) %>%
ggplot() +
geom_density_ridges(aes(x = industry_output, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
scale_x_continuous(labels = scales::comma) +
guides(fill = "none") +
labs(x = "2017 Total Industry Output (Millions of $)", y = "3 Digit NAICS Group") +
theme_bw() +
axis_theme
ind_output
## Picking joint bandwidth of 9620
ind_output_log <- manf_ind %>%
filter(industry_output != 0) %>%
ggplot() +
geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
scale_x_continuous(trans = scales::log_trans()) +
guides(fill = "none") +
labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") +
theme_bw() +
axis_theme
ind_output_log
## Picking joint bandwidth of 0.0416
ind_output_comp <- ind_output + ind_output_log
ind_output_comp
## Picking joint bandwidth of 9620
## Picking joint bandwidth of 0.0416
We might also be interested in the distribution of different measures of value added across manufacturing sectors. This distribution is close to the total output distribution, but has slightly flatter peaks, and some shifted distributions.
ind_value_add <- manf_ind %>%
ggplot() +
geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
scale_x_continuous(trans = scales::log_trans()) +
guides(fill = "none") +
labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") +
theme_bw() +
axis_theme
ind_value_add
## Picking joint bandwidth of 0.0464
ind_value_add +
geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.5) +
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.398
##
## Picking joint bandwidth of 0.397
However, when we examine the ratio of value added per industry output, we see a very different distribution across manufacturing sub-sectors, with certain sub-sectors housing industries with much higher value added.
value_add_ratio <- manf_ind %>%
ggplot() +
geom_density_ridges(aes(x = val_ratio, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
guides(fill = "none") +
labs(x = "Value Added Per Output, 2017", y = "") +
theme_bw() +
axis_theme
value_add_ratio
## Picking joint bandwidth of 0.0311
We see that the distribution of use of inputs follows the distribution of total output very closely.
input_sum <- manf_ind %>%
ggplot() +
geom_density_ridges(aes(x = log_inputs, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
scale_x_continuous(trans = scales::log_trans()) +
guides(fill = "none") +
labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") +
theme_bw() +
axis_theme +
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
input_sum
## Picking joint bandwidth of 0.388
input_sum +
geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.8) +
geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.3, color = "white") +
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16)) +
theme_dark()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.388
##
## Picking joint bandwidth of 0.398
##
## Picking joint bandwidth of 0.397
However, when examining the intensity of use of intermediary inputs (e.g. the ratio of intermediary inputs to output), the distribution of each 3 Digit NAICS Manufacturing sector changes substantially.
input_ratio <- manf_ind %>%
ggplot() +
geom_density_ridges(aes(x = input_ratio, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
guides(fill = "none") +
labs(x = "2017 Ratio of Intermeidary Inputs to Industry Output", y = "") +
theme_bw() +
axis_theme
input_ratio
## Picking joint bandwidth of 0.0311
input_ratio +
geom_density_ridges(aes(x = val_ratio, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.7)
## Picking joint bandwidth of 0.0311
## Picking joint bandwidth of 0.0311
We conclude these summary statistics by examining the intensity of use of imports by manufacturing sub-sector.
import_sum <- manf_ind %>%
filter(imports != 0) %>%
ggplot() +
geom_density_ridges(aes(x = log_imports, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
guides(fill = "none") +
labs(x = "2017 Log Imports", y = "") +
theme_bw() +
axis_theme
import_sum
## Picking joint bandwidth of 0.508
input_sum +
geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.8) +
geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.3, color = "white") +
geom_density_ridges(data = manf_ind %>%
filter(imports != 0),
aes(x = log_imports, y = naics_3digit_label, fill = naics_3digit_label), color = "grey") +
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16)) +
theme_dark()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.388
##
## Picking joint bandwidth of 0.398
##
## Picking joint bandwidth of 0.397
##
## Picking joint bandwidth of 0.508
import_rat <- manf_ind %>%
ggplot() +
geom_density_ridges(aes(x = import_ratio, y = naics_3digit_label, fill = naics_3digit_label)) +
scale_fill_manual(values = ind_pal) +
guides(fill = "none") +
labs(x = "2017 Ratio of Imports to Intermediate Inputs", y = "") +
theme_bw() +
axis_theme
import_rat
## Picking joint bandwidth of 0.00136
We see that the ratio of intermediary inputs to total output and the ratio of value added to total output, appear to almost mirror each other, supporting the observation that because of how input-output accounting is done, the ratio of intermediary inputs to total output also captures value added in the final production step.
We now create some commodity level summaries, detailing the total use of specific commodities as intermediary inputs, compared to final products. In addition, we include measures of how many of these commodities are exported,
com_agg <- use_table_clean %>%
filter(industry_code %in% c("T001", "F04000", "T019")) %>%
select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>%
pivot_wider(id_cols = c(commodity_code, commodity_desc), names_from = industry_desc, values_from = value_num)
colnames(com_agg) <- c("commodity_code", "commodity_desc", "intermediate", "exports", "total_use")
This dataframe has COMMODITY level summaries. We can use commodity level summaries to control for the input-intensity of a given commodity. ****INSERT HERE: Commodities that have a higher share of intermediate use to total use, are
Then, we can look at the use of specific commodities by industries to understand which industries are using relatively input-intensive *****
We now have aggregate commodity and industry level summaries, and can simplify our original dataframe. We will also need to perform similar aggregation measures in the supply table (the make table)
At this point, we might also want to separate out our commodities into some broad categories. While this categorization might be too granular, it at least allows us some baseline to assess industry use of specific types of inputs. This categorization is based on the NAICS codes used in the SUPPLY/MAKE tables, and roughly varies at the 3-digit level. A few specific inputs, such as R&D, are called out.
use_table_final <- use_table_clean %>%
filter(!is.na(com_code_3), !is.na(naics_3digit)) %>%
mutate(input_type = case_when(
com_code_3 < 113 ~ "Agriculture and Farming",
com_code_3 == 113 | com_code_3 == 115 ~ "Forestry and Logging",
com_code_3 == 114 ~ "Fishing, Hunting, Trapping",
com_code_3 == 211 ~ "Oil and Gas Extraction",
com_code_3 == 212 ~ "Metal Mining",
com_code_3 == 213 ~ "Support for Oil, Gas, and Mining",
com_code_3 == 221 ~ "Electricity, Gas, Water, Sewage, and other systems",
com_code_3 >= 300 & com_code_3 < 400 ~ "Manufactured Input",
com_code_3 >= 400 & com_code_3 < 460 ~ "Other wholesalers, suppliers, retailers",
com_code_3 >= 481 & com_code_3 < 490 ~ "Transportation",
com_code_3 >= 491 & com_code_3 < 493 ~ "Postal Service and other messaging",
com_code_3 >= 500 & com_code_3 < 600 ~ "Software, Information, Financial, Legal, and other services",
com_code_3 == 611 ~ "Junior colleges, colleges, universities, professional schools, and other educational services",
com_code_3 >= 621 & com_code_3 < 700 ~ "Hospitals, medical and diagnostic labs, and other health care services",
com_code_3 == 811 | com_code_3 == 230 ~ "Maintenance & Repair",
com_code_3 == 813 ~ "Grantmaking and Philanthropy",
TRUE ~ "Other inputs (e.g. arts & recreation; accomodation, etc.)"
)) %>%
mutate(input_type = case_when(
commodity_code == "423100" ~ "Motor vehicle and motor vehicle parts and supplies",
commodity_code == "423800" ~ "Machinery, Equipment, Supplies",
commodity_code == "424700" ~ "Petroleum and Petroleum Products",
commodity_code == "541700" ~ "Scientific research and development services",
TRUE ~ input_type
))
#Create dataframe for ordering for later
com_order <- use_table_final %>%
group_by(input_type) %>%
reframe(com_sum = sum(value_num, na.rm = TRUE)) %>%
arrange(input_type) %>%
mutate(alpha_order = seq(n())) %>%
arrange(desc(com_sum)) %>%
mutate(num_order = seq(n()))
We now start by creating some high level summary statistics by industry, focusing specifically on the manufacturing sector. Previous work has categorized 6-digit NAICS sectors into traded, or local (Delgado et al., 2014); or “supply-chain” industries (Delgado and Mills, 2020), but explicitly does not consider the supply chain of specific companies (e.g. GM), or specific industries. In this work, we seek to map out specific supply chains for specific industries, and examine how supply chains organize differently across regions. In the above section, we already provided summary statistics about the total use of inputs by industries. Now, we want to specifically focus on the type of inputs that manufacturers require.
manf_inputs <- use_table_final %>%
filter(naics_3digit >= 300 & naics_3digit < 400) %>%
select(-c(value, value_clean)) %>%
left_join(manf_ind) %>%
mutate(input_share = value_num / intermediary_inputs)
## Joining with `by = join_by(industry_desc, industry_code, naics_3digit,
## naics_3digit_label)`
In total, we have, ‘r manf_inputs %>% select(industry_desc) %>% distinct() %>% nrow()’ (232) different types of manufacturing, at the 6-digit level. To make sense of the variation in use of inputs by manufacturing sub-sector, we start with a 3-level by 3-level summary, acknowledging that there is substantial variation in use of inputs by the different manufacturing sector.
input_list <- manf_inputs %>%
select(input_type) %>%
distinct() %>%
unlist()
input_agg_share <- manf_inputs %>%
group_by(industry_code, industry_desc, naics_3digit_label, input_type) %>%
reframe(input_share = sum(input_share, na.rm = TRUE), intermediary_inputs, value = sum(value_num, na.rm = TRUE)) %>%
distinct()
manf_codes <- manf_inputs %>%
select(naics_3digit_label) %>%
distinct() %>%
unlist()
ExpandColorsLIGHT <- function(colors, n, steps = 11){
if(n <= steps){
suppressWarnings({
sapply(colors, function(x){colorRampPalette(c(x, "#FFFFFF"))(steps)}) %>%
as.data.frame() %>%
filter(row_number() <= n) %>%
gather(key = original.color, value = expanded.color)
})
}else{
warning("Select n < steps!")
}
}
ExpandColorsDARK <- function(colors, n, steps = 11){
if(n <= steps){
suppressWarnings({
sapply(colors, function(x){colorRampPalette(c(x, "#000000"))(steps)}) %>%
as.data.frame() %>%
filter(row_number() <= n) %>%
gather(key = original.color, value = expanded.color)
})
}else{
warning("Select n < steps!")
}
}
input_list %>% unname()
## [1] "Agriculture and Farming"
## [2] "Forestry and Logging"
## [3] "Fishing, Hunting, Trapping"
## [4] "Oil and Gas Extraction"
## [5] "Metal Mining"
## [6] "Electricity, Gas, Water, Sewage, and other systems"
## [7] "Maintenance & Repair"
## [8] "Manufactured Input"
## [9] "Motor vehicle and motor vehicle parts and supplies"
## [10] "Other wholesalers, suppliers, retailers"
## [11] "Machinery, Equipment, Supplies"
## [12] "Petroleum and Petroleum Products"
## [13] "Transportation"
## [14] "Postal Service and other messaging"
## [15] "Other inputs (e.g. arts & recreation; accomodation, etc.)"
## [16] "Software, Information, Financial, Legal, and other services"
## [17] "Scientific research and development services"
## [18] "Junior colleges, colleges, universities, professional schools, and other educational services"
## [19] "Grantmaking and Philanthropy"
input_cols <- c("#4DAF4A", "#D8CBB7", "#C7E1C6", "#7E5F37", "#252525", "#F3C38F" ,"#80B1D3", "#597B93", "#954D24", "#969696", "#386CB0","#E78AC3", "#B1865D", "#D9D9D9", "#BDBDBD", "#cf4633", "#FB8072" , "#F1E2CC", "#703933")
input_col_map <- data.frame(input_type = input_list %>% unname(), input_cols) %>%
mutate(tal = seq(n()))
com_order <- left_join(input_col_map, com_order)
## Joining with `by = join_by(input_type)`
# Map Inputs to Colors
get_col_vec <- function(data){
data %>%
select(input_type) %>%
distinct() %>%
unlist()
}
#Function to get appropriate input colors
get_col_match <- function(data){
col_vec <- get_col_vec(data)
col_out <- input_col_map %>%
filter(input_type %in% col_vec) %>%
select(input_cols) %>%
unlist() %>%
unname()
return(col_out)
}
make_map <- function(number){
df <- input_agg_share %>%
filter(naics_3digit_label %in% manf_codes[number], !is.na(input_share)) %>%
mutate(val_round = round(input_share, 5)) %>%
filter(val_round > 0) %>%
group_by(input_type) %>%
mutate(count = n()) %>%
filter(count >= 3) %>%
ungroup() %>%
left_join(com_order)
col_vec_1 <- df %>%
get_col_match()
# col_vec_2 <- df %>%
# filter(count <3) %>%
# get_col_match()
df %>%
ggplot(aes(x = input_share, y = reorder(input_type, -num_order), fill = reorder(input_type, tal))) +
geom_density_ridges(alpha = 0.7, jittered_points = TRUE, point_alpha=1,point_shape=21) +
geom_text(data = df %>% select(input_type, num_order, tal) %>% distinct(), aes(x = 0.35, y = reorder(input_type, -num_order), label = input_type), color = "black", nudge_y = -.2) +
scale_fill_manual(values = col_vec_1, guide = "none") +
# ggnewscale::new_scale_fill() +
# geom_point(data = df %>% filter(count <3), aes(), shape = 21) +
# scale_fill_manual(values = col_vec_2, guide = "none") +
theme_bw() +
labs(x = "", y = "", title = manf_codes[number], fill = "")
}
make_map_simple <- function(number){
df <- input_agg_share %>%
filter(naics_3digit_label %in% manf_codes[number], !is.na(input_share)) %>%
# group_by(input_type) %>%
# reframe(inputs_total = sum(intermediary_inputs), individual_input = sum(value)) %>%
# mutate(input_share = individual_input / inputs_total) %>%
left_join(com_order)
col_vec_1 <- df %>%
get_col_match()
# col_vec_2 <- df %>%
# filter(count <3) %>%
# get_col_match()
df %>%
ggplot(aes(x = input_share, y = reorder(input_type, -num_order), fill = reorder(input_type, tal)), group = industry_desc) +
geom_col(alpha = 0.7, color = "black", position = "stack") +
geom_text(data = df %>% select(input_type, num_order, tal) %>% distinct(), aes(x = 0.35, y = reorder(input_type, -num_order), label = input_type), color = "black", nudge_y = -.2) +
scale_fill_manual(values = col_vec_1, guide = "none") +
guides(color = "none") +
# ggnewscale::new_scale_fill() +
# geom_point(data = df %>% filter(count <3), aes(), shape = 21) +
# scale_fill_manual(values = col_vec_2, guide = "none") +
theme_bw() +
labs(x = "", y = "", title = manf_codes[number], fill = "") +
theme(legend.position = "bottom")
}
manf_codes %>% unname()
## [1] "Food manufacturing"
## [2] "Chemical manufacturing"
## [3] "Beverage and tobacco product manufacturing"
## [4] "Miscellaneous manufacturing"
## [5] "Textile mills"
## [6] "Textile product mills"
## [7] "Wood product manufacturing"
## [8] "Nonmetallic mineral product manufacturing"
## [9] "Furniture and related product manufacturing"
## [10] "Apparel manufacturing"
## [11] "Leather and allied product manufacturing"
## [12] "Paper manufacturing"
## [13] "Printing and related support activities"
## [14] "Plastics and rubber products manufacturing"
## [15] "Primary metal manufacturing"
## [16] "Fabricated metal product manufacturing"
## [17] "Transportation equipment manufacturing"
## [18] "Petroleum and coal products manufacturing"
## [19] "Electrical equipment, appliance, and component manufacturing"
## [20] "Machinery manufacturing"
## [21] "Computer and electronic product manufacturing"
food_manf <- make_map(1)
## Joining with `by = join_by(input_type)`
chem_manf <- make_map(2)
## Joining with `by = join_by(input_type)`
bev_manf <- make_map(3)
## Joining with `by = join_by(input_type)`
misc_manf <- make_map(4)
## Joining with `by = join_by(input_type)`
text_manf <- make_map(5)
## Joining with `by = join_by(input_type)`
text_prod_manf <- make_map(6)
## Joining with `by = join_by(input_type)`
wood_manf <- make_map(7)
## Joining with `by = join_by(input_type)`
non_metal_min_manf <- make_map(8)
## Joining with `by = join_by(input_type)`
furn_manf <- make_map(9)
## Joining with `by = join_by(input_type)`
apparel_manf <- make_map_simple(10)
## Joining with `by = join_by(input_type)`
leather_manf <- make_map_simple(11)
## Joining with `by = join_by(input_type)`
paper_manf <- make_map(12)
## Joining with `by = join_by(input_type)`
printing <- make_map_simple(13)
## Joining with `by = join_by(input_type)`
plastics_manf <- make_map(14)
## Joining with `by = join_by(input_type)`
metal_manf <- make_map(15)
## Joining with `by = join_by(input_type)`
fab_metal_manf <- make_map(16)
## Joining with `by = join_by(input_type)`
transport_equipment_manf <- make_map(17)
## Joining with `by = join_by(input_type)`
petro_manf <- make_map(18)
## Joining with `by = join_by(input_type)`
ee_appliance_component_manf <- make_map(19)
## Joining with `by = join_by(input_type)`
machine_manf <- make_map(20)
## Joining with `by = join_by(input_type)`
comp_ee_manf <- make_map(21)
## Joining with `by = join_by(input_type)`
Across food, miscellaneous, and beverage manufacturing, the distribution of the use of manufactured inputs is fairly flat, but wide for food manufacturing, higher and shifted right for miscellaneous manufacturing, as well as beverage and tobacco manufacturing. Miscellaneous manufacturing and beverage and tobacco manufacturing both also have a higher ratio of software, financial, legal, and other information services. The use of agricultural/farming as well fishing/hunting inputs is also not surprising.
g1 <- food_manf + theme(axis.text.y = element_blank())
g2 <- misc_manf + theme(axis.text.y = element_blank())
g3 <- bev_manf + theme(axis.text.y = element_blank())
gA <- g1 + g2 + g3
gA
## Picking joint bandwidth of 0.0193
## Picking joint bandwidth of 0.00642
## Picking joint bandwidth of 0.0191
Across chemical, plastics & rubber, and petroleum and coal product manufacturing, both chemical as well as petroleum and coal product manufacturing use mined inputs. One of the subsectors in petroleum and coal product manufacturing uses fewer manufactured inputs. There is some use of warehousing in chemical manufacturing, and both chemical manufacturing as well as plastics and rubber product manufacturing have some sub-sectors with a high use of electricity, water, sewage, gas, and other services.
g4 <- chem_manf + theme(axis.text.y = element_blank())
g5 <- plastics_manf + theme(axis.text.y = element_blank())
g6 <- petro_manf + theme(axis.text.y = element_blank())
gB <- g4 + g5 + g6
gB
## Picking joint bandwidth of 0.00818
## Picking joint bandwidth of 0.00559
## Picking joint bandwidth of 0.0229
Across paper, printing, and leather manufacturing, the use of manufactured inputs, as well as electricity, gas, sewage, water and other services, is rather high, along with the use of software and other financial services. Unsurprisingly, paper manufacturing has high use of forestry and logging. Printing and leather manufacturing only have two and one sub-industry, respectively.
g7 <- paper_manf + theme(axis.text.y = element_blank())
g8 <- printing + theme(axis.text.y = element_blank())
g9 <- leather_manf + theme(axis.text.y = element_blank())
gC <- g7 + g8 + g9
gC
## Picking joint bandwidth of 0.0136
Wood product manufacturing consumes forestry and logging inputs, and not all sub-sectors have a high useage of manufactured inputs, while furniture manufacturing has a high use of manufactured inputs. There is only one sub-sector in apparel manufacturing.
g10 <- wood_manf + theme(axis.text.y = element_blank())
g11 <- furn_manf + theme(axis.text.y = element_blank())
g12 <- apparel_manf + theme(axis.text.y = element_blank())
gD <- g10 + g11 + g12
gD
## Picking joint bandwidth of 0.0157
## Picking joint bandwidth of 0.00512
Across textile mills, textile product mills, and machinery manufacturing, the use of manufactured inputs is high.
g13 <- text_manf + theme(axis.text.y = element_blank())
g14 <- text_prod_manf + theme(axis.text.y = element_blank())
g15 <- machine_manf + theme(axis.text.y = element_blank())
gE <- g13 + g14 + g15
gE
## Picking joint bandwidth of 0.015
## Picking joint bandwidth of 0.0106
## Picking joint bandwidth of 0.00622
Across fab-metal manufacturing, metal manufacturing, and non-metallic minerals manufacturing, the use of metal mining is high in the later two, the use of manufactured inputs is high in fabricated metal manufacturing and more distributed in the other two, and both of the later two also have some subsectors that use eletricity, gas, water, sewage, and other services.
g16 <- fab_metal_manf + theme(axis.text.y = element_blank())
g17 <- metal_manf + theme(axis.text.y = element_blank())
g18 <- non_metal_min_manf + theme(axis.text.y = element_blank())
gF <- g16 + g17 + g18
gF
## Picking joint bandwidth of 0.00442
## Picking joint bandwidth of 0.0177
## Picking joint bandwidth of 0.0173
Across electricial equipment; computer and electonic, and transportation equipment manufacturing, we again see heterogeneity in the use of software type services, as well as manufactured inputs. Electrical equipment as well as computer and electronic manufacturing both use more wholesalers.
g19 <- ee_appliance_component_manf + theme(axis.text.y = element_blank())
g20 <- comp_ee_manf + theme(axis.text.y = element_blank())
g21 <- transport_equipment_manf + theme(axis.text.y = element_blank())
gG <- g19 + g20 + g21
gG
## Picking joint bandwidth of 0.00625
## Picking joint bandwidth of 0.00909
## Picking joint bandwidth of 0.00546
Overall, we can make some broad observations above the distribution of the use of inputs by 6-digit manufacturing sub-sectors. There is wide variation in the use of software, information, financial, legal, and other such services across sub-sectors. Overall, as 3-digit level industries use specific inputs, the heterogeneity of use of specific types of inputs also appears to increase as well.
Other figures to create: - use of imports by manufacturing industry? - total use of intermediary inputs by manufacturing industry?
Measures we might want to calculate or use: - % of industry total inputs a specific commodity is (this measure should preceed the following two steps)
Outcome oriented: what do we want? Relative measures of intensity of use of intermediate materials by subsector??? Soemthing else?? BLEeeh….. very difficult.
supply_table_raw <- read.csv(here("Input Output Data/SUPPLY_TABLE_2017/2017-Table 1.csv"))
We create a crosswalk between industry codes and their descriptions.
supply_cols <- colnames(supply_table_raw)
sup_ind_codes <- supply_table_raw %>%
head(1) %>%
pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "industry_code") %>%
select(industry_code, industry_desc)
The first step will be to convert this data into a long data format.
supply_table_long <- supply_table_raw[-1,] %>%
pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "value", values_drop_na = TRUE) %>%
mutate(value_clean = str_remove_all(value, "[[:punct:]]"),
value_num = as.numeric(value_clean)) %>%
filter(!is.na(value_num)) %>%
rename(commodity_code = 1, commodity_desc = 2) %>%
left_join(sup_ind_codes)
## Joining with `by = join_by(industry_desc)`
The resulting dataframe details how individuals industries MAKE specific commodities. E.G the SUPPLY for specific OUTPUTS from industries. Total output from industry are given by the “diagonal” entry (i.e commodity code = industry code).
Again, we add a few additional descriptive columns to be able to segment and sort our data. Values in this data are then the total amount in millions of dollars of specific outputs produced by specific industries.
supply_table_clean <- supply_table_long %>%
mutate(com_code_3 = as.numeric(str_sub(commodity_code, 1, 3)),
naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>%
left_join(ind_3digit)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `com_code_3 = as.numeric(str_sub(commodity_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Joining with `by = join_by(naics_3digit)`
As above, we note that some of the commodity codes refer to aggregate measures. The same is true for some of the industry codes.
sup_agg_coms <- supply_table_clean %>% select(commodity_code, commodity_desc, com_code_3) %>% distinct() %>% filter(is.na(com_code_3))
sup_agg_coms <- sup_agg_coms[-c(1:2),]
sup_agg_coms %>% select(1:2)
## # A tibble: 13 × 2
## commodity_code commodity_desc
## <chr> <chr>
## 1 52A000 Monetary authorities and depository credit intermediation
## 2 S00500 Federal general government (defense)
## 3 S00600 Federal general government (nondefense)
## 4 S00102 Other federal government enterprises
## 5 GSLGE State and local government (educational services)
## 6 GSLGH State and local government (hospitals and health services)
## 7 GSLGO State and local government (other services)
## 8 S00203 Other state and local government enterprises
## 9 S00401 Scrap
## 10 S00402 Used and secondhand goods
## 11 S00300 Noncomparable imports
## 12 S00900 Rest of the world adjustment
## 13 T017 Total industry supply
These codes are useful in informing aggregate measures, such as total industry supply. As before, some aggregation over industry codes is necessary as well. Here, aggregation includes the measures of imports per commodity, as well as total commodity output.
sup_agg_ind <- supply_table_clean %>% select(industry_code, industry_desc, naics_3digit) %>% distinct() %>% filter(is.na(naics_3digit))
Because of the way that these databases are constructred, the sup_agg_ind dataframe above refers to commodity level summaries, while the sup_agg_com dataframe above refers to industry level summaries. We spend some time at the moment to create some commodity and industry level benchmarks.
sup_agg_coms %>%
filter(commodity_code %in% c("T017")) %>%
select(1:2)
## # A tibble: 1 × 2
## commodity_code commodity_desc
## <chr> <chr>
## 1 T017 Total industry supply
We begin with creating benchmarks for total supply by industry.
sup_ind_agg <- supply_table_clean %>%
filter(commodity_code %in% c("T017")) %>%
select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>%
pivot_wider(id_cols = c(industry_code, industry_desc), names_from = commodity_code, values_from = value_num)
colnames(sup_ind_agg) <- c("industry_code", "industry_desc", "total_industry_supply")
We now create some commodity level summaries, detailing the total commodity output, as well as total commodity imports
sup_com_agg <- supply_table_clean %>%
filter(industry_code %in% c("T007", "MCIF")) %>%
select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>%
pivot_wider(id_cols = c(commodity_code, commodity_desc), names_from = industry_desc, values_from = value_num)
colnames(sup_com_agg) <- c("commodity_code", "commodity_desc", "total_commodity_output", "total_commodity_imports")
We now have aggregate commodity and industry level summaries, and can simplify our original dataframe. We will also need to perform similar aggregation measures in the supply table (the make table?)
At this point, we might also want to separate out our commodities into some broad categories. While this categorization might be too granular, it at least allows us some baseline to assess industry use of specific types of inputs.
use_table_final <- use_table_clean %>%
filter(!is.na(com_code_3), !is.na(naics_3digit)) %>%
mutate(input_type = case_when(
com_code_3 < 113 ~ "Agriculture and Farming",
com_code_3 == 113 | com_code_3 == 115 ~ "Forestry and Logging",
com_code_3 == 114 ~ "Fishing, Huntinng, Trapping",
com_code_3 == 211 ~ "Oil and Gas Extraction",
com_code_3 == 212 ~ "Metal Mining",
com_code_3 == 213 ~ "Support for Oil, Gas, and Mining",
com_code_3 == 221 ~ "Electricity, Gas, Water, Sewage, and other systems",
com_code_3 == 233 ~ "Healthcare, educational, or vocational structures",
com_code_3 >= 300 & com_code_3 < 400 ~ "Manufactured Output",
com_code_3 >= 400 & com_code_3 < 460 ~ "Other wholesalers, suppliers, retailers",
com_code_3 >= 481 & com_code_3 < 490 ~ "Transportation",
com_code_3 >= 491 & com_code_3 < 493 ~ "Postal Service and other messaging",
com_code_3 >= 500 & com_code_3 < 600 ~ "Software, Information, Financial, Legal, and other service",
com_code_3 == 611 ~ "Junior colleges, colleges, universities, professional schools, and other educational services",
com_code_3 >= 621 & com_code_3 < 700 ~ "Hospitals, medical and diagnostic labs, and other health care services",
com_code_3 == 811 | com_code_3 == 230 ~ "Maintenance & Repair",
com_code_3 == 813 ~ "Grantmaking and Philanthropy",
TRUE ~ "Other Outputs (e.g. arts & recreation; accomodation, etc.)"
)) %>%
mutate(input_type = case_when(
commodity_code == "423100" ~ "Motor vehicle and motor vehicle parts and supplies",
commodity_code == "423800" ~ "Machinery, Equipment, Supplies",
commodity_code == "424700" ~ "Petroleum and Petroleum Products",
commodity_code == "541700" ~ "Scientific research and development services",
TRUE ~ input_type
))